This script visualizes the full dataset as well as highlighting the sub-populations in the gland cells.

Read in the data

library(scran)
library(scater)
library(DropletUtils)
library(openxlsx)
library(Rtsne)
library(pheatmap)
library(viridis)
library(sceasy) #devtools::install_github("cellgeni/sceasy")
library(reticulate)
# library(umap)

source("~/Dropbox/Postdoc/git/BEOrigin/Analysis/Functions/auxiliary.R")

set.seed(123456)

All data

Fix the annotation of all data and add annotation for cell type.

# Read in the normalized and batch-corrected reads
sce.all <- readRDS("~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/old/alldata.rds")

annot.col <- vector(length = 8)
names(annot.col) <- unique(colData(sce.all)$Tissue)
annot.col["NSCJ"] <- "#F39B7F"
annot.col["BSCJ"] <- "#DC0000"
annot.col["NE"] <- "#8B0000"
annot.col["NG"] <- "#4DBBD5"
annot.col["BE"] <- "#00A087"
annot.col["ND"] <- "#3C5488"
annot.col["SMG"] <- "#B09C85"
annot.col["GM"] <- "#BFE7E1"


tsne.corrected <- ggplot(data.frame(tsne1 = reducedDims(sce.all)$TSNE[,1], tsne2 = reducedDims(sce.all)$TSNE[,2], 
                                    tissue = colData(sce.all)$Tissue)) + 
  geom_point(aes(tsne1, tsne2, colour = tissue)) + scale_color_manual(values = annot.col) + theme_minimal()
tsne.corrected 

# Replace samples name
colData(sce.all)$Sample <- sub(".+(Patient.+?_.+?_.+?)_.+", "\\1", colData(sce.all)$Sample, perl = TRUE)


# Introduce cell type for known cells
sce.good <- readRDS("~/Dropbox/Postdoc/2019-12-29_BE2020/All_corrected_sce_filtered.rds")

colData(sce.good)$Sample <- sub(".+(Patient.+?_.+?_.+?)_.+", "\\1", colData(sce.good)$Sample, perl = TRUE)

colData(sce.all)$cell_type<-"Not_assessed"
colData(sce.all)$cell_type_secondary<-"Not_assessed"
colData(sce.all)$tissue_type<-"Not_assessed"
colData(sce.all)$cell_type[match(paste0(colData(sce.good)$Sample, colData(sce.good)$Barcode),paste0(colData(sce.all)$Sample, colData(sce.all)$Barcode))] <- colData(sce.good)$cell_type
colData(sce.all)$cell_type_secondary[match(paste0(colData(sce.good)$Sample, colData(sce.good)$Barcode),paste0(colData(sce.all)$Sample, colData(sce.all)$Barcode))] <- colData(sce.good)$cell_type_secondary
colData(sce.all)$tissue_type[match(paste0(colData(sce.good)$Sample, colData(sce.good)$Barcode),paste0(colData(sce.all)$Sample, colData(sce.all)$Barcode))] <- colData(sce.good)$tissue_type


# change the order of columns and keep all continous columns
colData(sce.all)<-colData(sce.all)[,c(4,3,1,15:17,6:14)]


#randomise data
set.seed(50014)
jumbled<-sample(1:ncol(sce.all), ncol(sce.all), replace = FALSE)
sce.all<-sce.all[,jumbled]
# colData(sce.all)<-colData(sce.all)[jumbled,]
# reducedDims(sce.all)$TSNE<-reducedDims(sce.all)$TSNE[jumbled,]

# Change coldata into factors
colData(sce.all)$Tissue <- factor(colData(sce.all)$Tissue, levels = c("NSCJ", "BSCJ", "NE", "NG", "BE", "ND", "SMG", "GM"))
colData(sce.all)$Patient <- factor(colData(sce.all)$Patient, levels = sort(unique(colData(sce.all)$Patient)))
colData(sce.all)$Sample <- factor(colData(sce.all)$Sample, levels = sort(unique(colData(sce.all)$Sample)))

cell.levels = c(
  "Basal",
  "Suprabasal",
  "Intermediate",
  "Superficial",
  
  "Undifferentiated",
  "Endocrine",
  "Foveolar_Intermediate",
  "Foveolar_differentiated",
  "Parietal",
  "Chief",
  
  "Columnar_Undifferentiated",
  "Columnar_Intermediate",
  "Columnar_differentiated",
  
  "Enterocytes_Intermediate",
  "Enterocytes_differentiated",
  "Paneth",
  
  "Goblet",
  
  "KRT5_cells",
  "KRT5.KRT7_cells",
  "KRT7_cells", 
  "MUC5B_cells",  
  
  "Mucous",
  "Oncocytes",
  "Duct_Intercalating",
  "Myo-epithelial",
  "Unknown.Doublets",
  
  "Immune",
  "Stromal",
  "Squamous_Esophagus",
  
  "Not_assessed"
)

colData(sce.all)$cell_type <- factor(colData(sce.all)$cell_type, levels = cell.levels)
levels(colData(sce.all)$cell_type)[18:21]<-paste0("C", 1:4)

cell.levels.2 = c(
  "Basal", 
  "Suprabasal",
  "Suprabasal_Dividing",
  "Intermediate",             
  "Superficial",
  
  "Undifferentiated", 
  "Undifferentiated_Dividing",
  "Foveolar_Intermediate", 
  "Foveolar_differentiated", 
  
  "Endocrine_NEUROG3",  
  "Endocrine_GHRL",
  "Endocrine_CHGA",
  "Endocrine_NEUROD1", 
  "Parietal",  
  "Chief",
  
  "Columnar_Undifferentiated",
  "Columnar_Undifferentiated_Dividing",
  "Columnar_Intermediate",
  "Columnar_differentiated",
  
  "KRT5_cells",
  "KRT5.KRT7_cells",
  "MUC5B_cells",        
  "KRT7_cells", 
  
  "Enterocytes_Intermediate",
  "Enterocytes_differentiated",
  "Paneth",
  "Goblet",
  
  "Mucous_MUC5B_High",
  "Oncocytes_MUC5B_Low",
  "Duct_Intercalating",
  "Myo-epithelial",
  
  "Immune_T-cells",
  "Immune_B-cells",
  "Immune_Macrophages",
  
  "Stromal_CALD1_cells",
  "Stromal_GNG11_cells", 
  "Stromal_ADH1B_cells", 
  
  "Squamous_Esophagus", 
  "Unknown.Doublets",
  "Not_assessed"
)

colData(sce.all)$cell_type_secondary <- factor(colData(sce.all)$cell_type_secondary, levels = cell.levels.2)
levels(colData(sce.all)$cell_type_secondary)[20:23]<-paste0("C", 1:4)
colData(sce.all)$tissue_type <- factor(colData(sce.all)$tissue_type, levels = c("Squamous", "Columnar", "Glandular", "NonEpithelial", "Not_assessed"))

# Fix the names of genes to make them Refseq

# Remove duplicated genes names
sce.all<-sce.all[!duplicated(rowData(sce.all)$Symbol),]

# Move Symbol to row name to keep it easy to loead into the data
rownames(sce.all)<-rowData(sce.all)$Symbol

# randomise expression values nad moved htem to log2 space
counts(sce.all)<-logcounts(sce.all)#[,jumbled]

gene<-"KRT5"
p.gene.expression<-ggplot(data.frame(tSNE1 =  reducedDims(sce.all)$TSNE[,1],
                                     tSNE2 =  reducedDims(sce.all)$TSNE[,2],
                                     gene = counts(sce.all)[rowData(sce.all)$Symbol == gene,]))  +
  # gene = logcounts(sce.all)[rowData(sce.all)$Symbol == gene,][jumbled]))  +
  geom_point(aes(tSNE1, tSNE2, colour = gene), size = 0.5) + scale_colour_viridis(option = "A", name = "log2(Expr)") +
  guides(alpha=FALSE) + ggtitle(gene) + theme(panel.grid.major = element_blank(), 
                                              panel.grid.minor = element_blank(),
                                              panel.background = element_blank(), 
                                              axis.line = element_line(colour = "grey"))
p.gene.expression

tsne.corrected <- ggplot(data.frame(tsne1 = reducedDims(sce.all)$TSNE[,1], tsne2 = reducedDims(sce.all)$TSNE[,2], 
                                    Tissue = colData(sce.all)$Tissue)) + 
  geom_point(aes(tsne1, tsne2, colour = Tissue)) + scale_color_manual(values = annot.col) + theme_void()
tsne.corrected 

ggsave("~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/alldata.pdf", 
       tsne.corrected, 
       width = 8, height = 7, useDingbats = FALSE)

sceasy:::convertFormat(sce.all, from="sce", to="anndata",
                       outFile="/home/karolno/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/alldata.h5ad")
## Warning in .regularise_df(as.data.frame(SummarizedExperiment::rowData(obj)), :
## Dropping single category variables:Type
## AnnData object with n_obs × n_vars = 50569 × 56852
##     obs: 'Tissue', 'Patient', 'Sample', 'cell_type', 'cell_type_secondary', 'tissue_type', 'total_features_by_counts', 'log10_total_features_by_counts', 'total_counts', 'log10_total_counts', 'pct_counts_in_top_50_features', 'pct_counts_in_top_100_features', 'pct_counts_in_top_200_features', 'pct_counts_in_top_500_features', 'Clusters'
##     var: 'ID', 'Symbol'
##     obsm: 'X_tsne'
saveRDS(sce.all, "~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/alldata.rds")

All data High quality

Fix the annotation of all high quality data.

# Read in the normalized and batch-corrected reads
sce.all <- readRDS("~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/old/alldatahighquality.rds")

annot.col <- vector(length = 6)
names(annot.col) <- unique(colData(sce.all)$Tissue)
annot.col["NSCJ"] <- "#F39B7FFF"
annot.col["BSCJ"] <- "#DC0000FF"
annot.col["NE"] <- "darkred"
annot.col["NG"] <- "#4DBBD5FF"
annot.col["BE"] <- "#00A087FF"
# annot.col["ND"] <- "#3C5488FF"
annot.col["SMG"] <- "#B09C85FF"
# annot.col["GM"] <- colorRampPalette(c("white", "#00A087FF"))(17)[c(5)]


tsne.corrected <- ggplot(data.frame(tsne1 = reducedDims(sce.all)$TSNE[,1], tsne2 = reducedDims(sce.all)$TSNE[,2], 
                                    tissue = colData(sce.all)$Tissue)) + 
  geom_point(aes(tsne1, tsne2, colour = tissue)) + scale_color_manual(values = annot.col) + theme_minimal()
tsne.corrected 

# Replace samples name
colData(sce.all)$Sample <- sub(".+(Patient.+?_.+?_.+?)_.+", "\\1", colData(sce.all)$Sample, perl = TRUE)


# change the order of columns and keep all continous columns
colData(sce.all)<-colData(sce.all)[,c(4,3,1,17:19,21, 6:13)]


#randomise data
set.seed(50014)
jumbled<-sample(1:ncol(sce.all), ncol(sce.all), replace = FALSE)
sce.all<-sce.all[,jumbled]
# colData(sce.all)<-colData(sce.all)[jumbled,]
# reducedDims(sce.all)$TSNE<-reducedDims(sce.all)$TSNE[jumbled,]

# Change coldata into factors
colData(sce.all)$Tissue <- factor(colData(sce.all)$Tissue, levels = c("NSCJ", "BSCJ", "NE", "NG", "BE", "SMG"))
colData(sce.all)$Patient <- factor(colData(sce.all)$Patient, levels = sort(unique(colData(sce.all)$Patient)))
colData(sce.all)$Sample <- factor(colData(sce.all)$Sample, levels = sort(unique(colData(sce.all)$Sample)))

cell.levels = c(
  "Basal",
  "Suprabasal",
  "Intermediate",
  "Superficial",
  
  "Undifferentiated",
  "Endocrine",
  "Foveolar_Intermediate",
  "Foveolar_differentiated",
  "Parietal",
  "Chief",
  
  "Columnar_Undifferentiated",
  "Columnar_Intermediate",
  "Columnar_differentiated",
  
  "Enterocytes_Intermediate",
  "Enterocytes_differentiated",
  "Paneth",
  
  "Goblet",
  
  "KRT5_cells",
  "KRT5.KRT7_cells",
  "KRT7_cells",
  "MUC5B_cells",
  
  "Mucous",
  "Oncocytes",
  "Duct_Intercalating",
  "Myo-epithelial",
  "Unknown.Doublets",
  
  "Immune",
  "Stromal",
  "Squamous_Esophagus"#,
  
  # "Not_assessed"
)

colData(sce.all)$cell_type <- factor(colData(sce.all)$cell_type, levels = cell.levels)
levels(colData(sce.all)$cell_type)[18:21]<-paste0("C", 1:4)

cell.levels.2 = c(
  "Basal", 
  "Suprabasal",
  "Suprabasal_Dividing",
  "Intermediate",             
  "Superficial",
  
  "Undifferentiated", 
  "Undifferentiated_Dividing",
  "Foveolar_Intermediate", 
  "Foveolar_differentiated", 
  
  "Endocrine_NEUROG3",  
  "Endocrine_GHRL",
  "Endocrine_CHGA",
  "Endocrine_NEUROD1", 
  "Parietal",  
  "Chief",
  
  "Columnar_Undifferentiated",
  "Columnar_Undifferentiated_Dividing",
  "Columnar_Intermediate",
  "Columnar_differentiated",
  
  "KRT5_cells",
  "KRT5.KRT7_cells",
  "KRT7_cells", 
  "MUC5B_cells",  
  
  "Enterocytes_Intermediate",
  "Enterocytes_differentiated",
  "Paneth",
  "Goblet",
  
  "Mucous_MUC5B_High",
  "Oncocytes_MUC5B_Low",
  "Duct_Intercalating",
  "Myo-epithelial",
  
  "Immune_T-cells",
  "Immune_B-cells",
  "Immune_Macrophages",
  
  "Stromal_CALD1_cells",
  "Stromal_GNG11_cells", 
  "Stromal_ADH1B_cells", 
  
  "Squamous_Esophagus", 
  "Unknown.Doublets"#,
  # "Not_assessed"
)

colData(sce.all)$cell_type_secondary <- factor(colData(sce.all)$cell_type_secondary, levels = cell.levels.2)
levels(colData(sce.all)$cell_type_secondary)[18:21]<-paste0("C", 1:4)


colData(sce.all)$tissue_type <- factor(colData(sce.all)$tissue_type, levels = c("Squamous", "Columnar", "Glandular", "NonEpithelial"))

# Change clustering output into factors
# colData(sce.all)$Clusters <- factor(colData(sce.all)$Clusters, levels =  sort(unique(colData(sce.all)$Clusters)))
# colData(sce.all)$Tissue_cluster <- factor(colData(sce.all)$Tissue_cluster, levels =  sort(unique(colData(sce.all)$Tissue_cluster)))
# colData(sce.all)$Global_cluster <- factor(colData(sce.all)$Global_cluster, levels =  sort(unique(colData(sce.all)$Global_cluster)))
colData(sce.all)$confidence <- factor(colData(sce.all)$confidence, levels =  sort(unique(colData(sce.all)$confidence), decreasing = TRUE))

# colnames(colData(sce.all))[8] <- "Clustering_per_sample"


# Fix the names of genes to make them Refseq

# Remove duplicated genes names
sce.all<-sce.all[!duplicated(rowData(sce.all)$Symbol),]

# Move Symbol to row name to keep it easy to loead into the data
rownames(sce.all)<-rowData(sce.all)$Symbol

# randomise expression values nad moved htem to log2 space
counts(sce.all)<-logcounts(sce.all)#[,jumbled]

gene<-"KRT5"
p.gene.expression<-ggplot(data.frame(tSNE1 =  reducedDims(sce.all)$TSNE[,1],
                                     tSNE2 =  reducedDims(sce.all)$TSNE[,2],
                                     gene = counts(sce.all)[rowData(sce.all)$Symbol == gene,]))  +
  # gene = logcounts(sce.all)[rowData(sce.all)$Symbol == gene,][jumbled]))  +
  geom_point(aes(tSNE1, tSNE2, colour = gene), size = 0.5) + scale_colour_viridis(option = "A", name = "log2(Expr)") +
  guides(alpha=FALSE) + ggtitle(gene) + theme(panel.grid.major = element_blank(), 
                                              panel.grid.minor = element_blank(),
                                              panel.background = element_blank(), 
                                              axis.line = element_line(colour = "grey"))
p.gene.expression

tsne.corrected <- ggplot(data.frame(tsne1 = reducedDims(sce.all)$TSNE[,1], tsne2 = reducedDims(sce.all)$TSNE[,2], 
                                    Tissue = colData(sce.all)$Tissue)) + 
  geom_point(aes(tsne1, tsne2, colour = Tissue)) + scale_color_manual(values = annot.col) + theme_void()
tsne.corrected 

ggsave("~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/alldatahighquality.pdf", 
       tsne.corrected, 
       width = 8, height = 7, useDingbats = FALSE)

sceasy:::convertFormat(sce.all, from="sce", to="anndata",
                       outFile="/home/karolno/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/alldatahighquality.h5ad")
## AnnData object with n_obs × n_vars = 39882 × 56852
##     obs: 'Tissue', 'Patient', 'Sample', 'cell_type', 'cell_type_secondary', 'tissue_type', 'confidence', 'total_features_by_counts', 'log10_total_features_by_counts', 'total_counts', 'log10_total_counts', 'pct_counts_in_top_50_features', 'pct_counts_in_top_100_features', 'pct_counts_in_top_200_features', 'pct_counts_in_top_500_features'
##     var: 'ID', 'Symbol'
##     obsm: 'X_tsne'
saveRDS(sce.all, "~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/alldatahighquality.rds")

Individual Tissues

Fix the annotation of individual tissues

for (tissue in c("SMG", "NSCJ", "NE", "NG", "ND", "BE")) {
  # Read in the normalized and batch-corrected reads
  sce.all <- readRDS(paste0("~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/old/", tissue, ".rds"))
  
  annot.col <- vector(length = 8)
  names(annot.col) <- unique(colData(sce.all)$Tissue)
  annot.col["NSCJ"] <- "#F39B7FFF"
  annot.col["BSCJ"] <- "#DC0000FF"
  annot.col["NE"] <- "darkred"
  annot.col["NG"] <- "#4DBBD5FF"
  annot.col["BE"] <- "#00A087FF"
  annot.col["ND"] <- "#3C5488FF"
  annot.col["SMG"] <- "#B09C85FF"
  annot.col["GM"] <- colorRampPalette(c("white", "#00A087FF"))(17)[c(5)]
  
  
  tsne.corrected <- ggplot(data.frame(tsne1 = reducedDims(sce.all)$TSNE[,1], tsne2 = reducedDims(sce.all)$TSNE[,2], 
                                      tissue = colData(sce.all)$Tissue)) + 
    geom_point(aes(tsne1, tsne2, colour = tissue)) + scale_color_manual(values = annot.col) + theme_minimal()
  print(tsne.corrected)
  
  # Replace samples name
  colData(sce.all)$Sample <- sub(".+(Patient.+?_.+?_.+?)_.+", "\\1", colData(sce.all)$Sample, perl = TRUE)
  
  
  # change the order of columns and keep all continous columns
  colData(sce.all)<-colData(sce.all)[,c(3,1,17:19,21, 6:13)]
  
  
  #randomise data
  set.seed(50014)
  jumbled<-sample(1:ncol(sce.all), ncol(sce.all), replace = FALSE)
  sce.all<-sce.all[,jumbled]
  # colData(sce.all)<-colData(sce.all)[jumbled,]
  # reducedDims(sce.all)$TSNE<-reducedDims(sce.all)$TSNE[jumbled,]
  
  # Change coldata into factors
  # colData(sce.all)$Tissue <- factor(colData(sce.all)$Tissue, levels = c("NSCJ", "BSCJ", "NE", "NG", "BE", "ND", "SMG"))
  colData(sce.all)$Patient <- factor(colData(sce.all)$Patient, levels = sort(unique(colData(sce.all)$Patient)))
  colData(sce.all)$Sample <- factor(colData(sce.all)$Sample, levels = sort(unique(colData(sce.all)$Sample)))
  
  cell.levels = c(
    "Basal",
    "Suprabasal",
    "Intermediate",
    "Superficial",
    
    "Undifferentiated",
    "Endocrine",
    "Foveolar_Intermediate",
    "Foveolar_differentiated",
    "Parietal",
    "Chief",
    
    "Columnar_Undifferentiated",
    "Columnar_Intermediate",
    "Columnar_differentiated",
    
    "Enterocytes_Intermediate",
    "Enterocytes_differentiated",
    "Paneth",
    
    "Goblet",
    
    "C1",
    "C2",
    "C3",
    "C4",
    
    "KRT5_cells",
    "KRT5.KRT7_cells",
    "KRT7_cells", 
    "MUC5B_cells",  
    
    "Mucous",
    "Oncocytes",
    "Duct_Intercalating",
    "Myo-epithelial",
    "Unknown.Doublets",
    
    "Immune",
    "Stromal",
    "Squamous_Esophagus"#,
    
    # "Not_assessed"
  )
  
  colData(sce.all)$cell_type <- factor(colData(sce.all)$cell_type, levels = cell.levels[cell.levels %in% unique(colData(sce.all)$cell_type)])
  print(levels(colData(sce.all)$cell_type))
  cell.levels.2 = c(
    "Basal", 
    "Suprabasal",
    "Suprabasal_Dividing",
    "Intermediate",             
    "Superficial",
    
    "Undifferentiated", 
    "Undifferentiated_Dividing",
    "Foveolar_Intermediate", 
    "Foveolar_differentiated", 
    
    "Endocrine_NEUROG3",  
    "Endocrine_GHRL",
    "Endocrine_CHGA",
    "Endocrine_NEUROD1", 
    "Parietal",  
    "Chief",
    
    "Columnar_Undifferentiated",
    "Columnar_Undifferentiated_Dividing",
    "Columnar_Intermediate",
    "Columnar_differentiated",
    
    "KRT5_cells",
    "KRT5.KRT7_cells",
    "KRT7_cells", 
    "MUC5B_cells",  
    
    "Enterocytes_Intermediate",
    "Enterocytes_differentiated",
    "Paneth",
    "Goblet",
    
    "Mucous_MUC5B_High",
    "Oncocytes_MUC5B_Low",
    "Duct_Intercalating",
    "Myo-epithelial",
    
    "Immune_T-cells",
    "Immune_B-cells",
    "Immune_Macrophages",
    
    "Stromal_CALD1_cells",
    "Stromal_GNG11_cells", 
    "Stromal_ADH1B_cells", 
    
    "Squamous_Esophagus", 
    "Unknown.Doublets"#,
    # "Not_assessed"
  )
  
  colData(sce.all)$cell_type_secondary <- factor(colData(sce.all)$cell_type_secondary, levels =  cell.levels.2[cell.levels.2 %in% unique(colData(sce.all)$cell_type_secondary)])
  print(levels(colData(sce.all)$cell_type_secondary))
  
  if(tissue == "NSCJ") {
    levels(colData(sce.all)$cell_type_secondary)[11:14]<-paste0("C", 1:4)

  }
  
  colData(sce.all)$tissue_type <- factor(colData(sce.all)$tissue_type, levels = c("Squamous", "Columnar", "Glandular", "NonEpithelial"))
  
  # Change clustering output into factors
  # colData(sce.all)$Clusters <- factor(colData(sce.all)$Clusters, levels =  sort(unique(colData(sce.all)$Clusters)))
  # colData(sce.all)$Tissue_cluster <- factor(colData(sce.all)$Tissue_cluster, levels =  sort(unique(colData(sce.all)$Tissue_cluster)))
  # colData(sce.all)$Global_cluster <- factor(colData(sce.all)$Global_cluster, levels =  sort(unique(colData(sce.all)$Global_cluster)))
  colData(sce.all)$confidence <- factor(colData(sce.all)$confidence, levels =  sort(unique(colData(sce.all)$confidence), decreasing = TRUE))
  
  # colnames(colData(sce.all))[8] <- "Clustering_per_sample"
  
  
  # Fix the names of genes to make them Refseq
  
  # Remove duplicated genes names
  sce.all<-sce.all[!duplicated(rowData(sce.all)$Symbol),]
  
  # Move Symbol to row name to keep it easy to loead into the data
  rownames(sce.all)<-rowData(sce.all)$Symbol
  
  # randomise expression values nad moved htem to log2 space
  counts(sce.all)<-logcounts(sce.all)#[,jumbled]
  
  gene<-"KRT5"
  p.gene.expression<-ggplot(data.frame(tSNE1 =  reducedDims(sce.all)$TSNE[,1],
                                       tSNE2 =  reducedDims(sce.all)$TSNE[,2],
                                       gene = counts(sce.all)[rowData(sce.all)$Symbol == gene,]))  +
    # gene = logcounts(sce.all)[rowData(sce.all)$Symbol == gene,][jumbled]))  +
    geom_point(aes(tSNE1, tSNE2, colour = gene), size = 0.5) + scale_colour_viridis(option = "A", name = "log2(Expr)") +
    guides(alpha=FALSE) + ggtitle(gene) + theme(panel.grid.major = element_blank(), 
                                                panel.grid.minor = element_blank(),
                                                panel.background = element_blank(), 
                                                axis.line = element_line(colour = "grey"))
  print(p.gene.expression)
  
  tsne.corrected <- ggplot(data.frame(tsne1 = reducedDims(sce.all)$TSNE[,1], tsne2 = reducedDims(sce.all)$TSNE[,2], 
                                      Cell_Type = colData(sce.all)$cell_type)) + 
    geom_point(aes(tsne1, tsne2, colour = Cell_Type)) +
    # scale_color_manual(values = annot.col) + 
    theme_void()
  print(tsne.corrected)
  
  
  
  
  
  ggsave(paste0("~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/", tissue, ".pdf"), 
         tsne.corrected, 
         width = 8, height = 7, useDingbats = FALSE)
  
  sceasy:::convertFormat(sce.all, from="sce", to="anndata",
                         outFile=paste0("/home/karolno/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/", tissue, ".h5ad"))
  saveRDS(sce.all, paste0("~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/", tissue, ".rds"))
}

## [1] "Mucous"             "Oncocytes"          "Duct_Intercalating"
## [4] "Myo-epithelial"     "Immune"             "Stromal"           
##  [1] "Mucous_MUC5B_High"   "Oncocytes_MUC5B_Low" "Duct_Intercalating" 
##  [4] "Myo-epithelial"      "Immune_T-cells"      "Immune_B-cells"     
##  [7] "Immune_Macrophages"  "Stromal_CALD1_cells" "Stromal_GNG11_cells"
## [10] "Stromal_ADH1B_cells"

##  [1] "Basal"                   "Suprabasal"             
##  [3] "Intermediate"            "Superficial"            
##  [5] "Undifferentiated"        "Endocrine"              
##  [7] "Foveolar_Intermediate"   "Foveolar_differentiated"
##  [9] "C1"                      "C2"                     
## [11] "C3"                      "C4"                     
## [13] "Immune"                  "Stromal"                
##  [1] "Basal"                   "Suprabasal"             
##  [3] "Suprabasal_Dividing"     "Intermediate"           
##  [5] "Superficial"             "Undifferentiated"       
##  [7] "Foveolar_Intermediate"   "Foveolar_differentiated"
##  [9] "Endocrine_GHRL"          "Endocrine_CHGA"         
## [11] "KRT5_cells"              "KRT5.KRT7_cells"        
## [13] "KRT7_cells"              "MUC5B_cells"            
## [15] "Immune_T-cells"          "Immune_B-cells"         
## [17] "Immune_Macrophages"      "Stromal_CALD1_cells"    
## [19] "Stromal_GNG11_cells"

## Warning in .regularise_df(as.data.frame(SummarizedExperiment::colData(obj)), :
## Dropping single category variables:confidence

## [1] "Basal"        "Suprabasal"   "Intermediate" "Superficial"  "Immune"      
## [1] "Basal"               "Suprabasal"          "Suprabasal_Dividing"
## [4] "Intermediate"        "Superficial"         "Immune_T-cells"     
## [7] "Immune_Macrophages"

## [1] "Undifferentiated"        "Endocrine"              
## [3] "Foveolar_Intermediate"   "Foveolar_differentiated"
## [5] "Parietal"                "Chief"                  
## [7] "Immune"                  "Stromal"                
##  [1] "Undifferentiated"          "Undifferentiated_Dividing"
##  [3] "Foveolar_Intermediate"     "Foveolar_differentiated"  
##  [5] "Endocrine_GHRL"            "Endocrine_CHGA"           
##  [7] "Endocrine_NEUROD1"         "Parietal"                 
##  [9] "Chief"                     "Immune_T-cells"           
## [11] "Immune_B-cells"            "Immune_Macrophages"       
## [13] "Stromal_CALD1_cells"       "Stromal_GNG11_cells"

## Warning in .regularise_df(as.data.frame(SummarizedExperiment::colData(obj)), :
## Dropping single category variables:confidence

## [1] "Undifferentiated"           "Endocrine"                 
## [3] "Enterocytes_Intermediate"   "Enterocytes_differentiated"
## [5] "Paneth"                     "Goblet"                    
## [7] "Immune"                     "Stromal"                   
##  [1] "Undifferentiated"           "Undifferentiated_Dividing" 
##  [3] "Endocrine_CHGA"             "Enterocytes_Intermediate"  
##  [5] "Enterocytes_differentiated" "Paneth"                    
##  [7] "Goblet"                     "Immune_T-cells"            
##  [9] "Immune_B-cells"             "Immune_Macrophages"        
## [11] "Stromal_CALD1_cells"        "Stromal_GNG11_cells"

## Warning in .regularise_df(as.data.frame(SummarizedExperiment::colData(obj)), :
## Dropping single category variables:confidence

## [1] "Endocrine"                 "Columnar_Undifferentiated"
## [3] "Columnar_Intermediate"     "Columnar_differentiated"  
## [5] "Goblet"                    "Immune"                   
## [7] "Stromal"                  
##  [1] "Endocrine_NEUROG3"                  "Columnar_Undifferentiated"         
##  [3] "Columnar_Undifferentiated_Dividing" "Columnar_Intermediate"             
##  [5] "Columnar_differentiated"            "Goblet"                            
##  [7] "Immune_T-cells"                     "Immune_B-cells"                    
##  [9] "Immune_Macrophages"                 "Stromal_CALD1_cells"               
## [11] "Stromal_GNG11_cells"

## Warning in .regularise_df(as.data.frame(SummarizedExperiment::colData(obj)), :
## Dropping single category variables:confidence

End Matter

To finish get session info:

sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Fedora 31 (Workstation Edition)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] destiny_3.0.1               edgeR_3.28.0               
##  [3] limma_3.42.2                dbscan_1.1-5               
##  [5] princurve_2.1.4             dynamicTreeCut_1.63-1      
##  [7] sceasy_0.0.6                reticulate_1.14            
##  [9] viridis_0.5.1               viridisLite_0.3.0          
## [11] pheatmap_1.0.12             Rtsne_0.15                 
## [13] openxlsx_4.1.4              DropletUtils_1.6.1         
## [15] scater_1.14.6               ggplot2_3.2.1              
## [17] scran_1.14.6                SingleCellExperiment_1.8.0 
## [19] SummarizedExperiment_1.16.1 DelayedArray_0.12.2        
## [21] BiocParallel_1.20.1         matrixStats_0.55.0         
## [23] Biobase_2.46.0              GenomicRanges_1.38.0       
## [25] GenomeInfoDb_1.22.0         IRanges_2.20.2             
## [27] S4Vectors_0.24.3            BiocGenerics_0.32.0        
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1             RcppEigen_0.3.3.7.0      igraph_1.2.4.2          
##   [4] lazyeval_0.2.2           sp_1.3-2                 RcppHNSW_0.2.0          
##   [7] digest_0.6.24            htmltools_0.4.0          magrittr_1.5            
##  [10] R.utils_2.9.2            xts_0.12-0               colorspace_1.4-1        
##  [13] rappdirs_0.3.1           haven_2.2.0              xfun_0.12               
##  [16] dplyr_0.8.4              crayon_1.3.4             RCurl_1.98-1.1          
##  [19] jsonlite_1.6.1           hexbin_1.28.1            zoo_1.8-7               
##  [22] glue_1.3.1               gtable_0.3.0             zlibbioc_1.32.0         
##  [25] XVector_0.26.0           car_3.0-6                BiocSingular_1.2.1      
##  [28] Rhdf5lib_1.8.0           DEoptimR_1.0-8           HDF5Array_1.14.2        
##  [31] abind_1.4-5              VIM_5.1.0                scales_1.1.0            
##  [34] ggplot.multistats_1.0.0  ggthemes_4.2.0           Rcpp_1.0.3              
##  [37] laeken_0.5.1             dqrng_0.2.1              foreign_0.8-72          
##  [40] rsvd_1.0.2               proxy_0.4-23             vcd_1.4-5               
##  [43] RColorBrewer_1.1-2       ellipsis_0.3.0           pkgconfig_2.0.3         
##  [46] R.methodsS3_1.8.0        farver_2.0.3             nnet_7.3-12             
##  [49] locfit_1.5-9.1           tidyselect_1.0.0         labeling_0.3            
##  [52] rlang_0.4.7              munsell_0.5.0            cellranger_1.1.0        
##  [55] tools_3.6.2              ranger_0.12.1            evaluate_0.14           
##  [58] stringr_1.4.0            yaml_2.2.1               knitr_1.28              
##  [61] zip_2.0.4                robustbase_0.93-5        purrr_0.3.3             
##  [64] formatR_1.7              R.oo_1.23.0              compiler_3.6.2          
##  [67] beeswarm_0.2.3           curl_4.3                 e1071_1.7-3             
##  [70] smoother_1.1             tibble_3.0.3             statmod_1.4.33          
##  [73] stringi_1.4.5            RSpectra_0.16-0          forcats_0.4.0           
##  [76] lattice_0.20-38          Matrix_1.2-18            vctrs_0.3.4             
##  [79] pillar_1.4.3             lifecycle_0.2.0          lmtest_0.9-37           
##  [82] BiocNeighbors_1.4.1      data.table_1.12.8        bitops_1.0-6            
##  [85] irlba_2.3.3              R6_2.4.1                 pcaMethods_1.78.0       
##  [88] gridExtra_2.3            rio_0.5.16               vipor_0.4.5             
##  [91] codetools_0.2-16         boot_1.3-23              MASS_7.3-51.4           
##  [94] assertthat_0.2.1         rhdf5_2.30.1             withr_2.1.2             
##  [97] GenomeInfoDbData_1.2.2   hms_0.5.3                grid_3.6.2              
## [100] tidyr_1.0.2              class_7.3-15             rmarkdown_2.1           
## [103] DelayedMatrixStats_1.8.0 carData_3.0-3            TTR_0.23-6              
## [106] scatterplot3d_0.3-41     ggbeeswarm_0.6.0